library(dplyr)
library(tidyr)
library(ggplot2)
library(xtable)
library(tikzDevice)
library(readr)

rm(list=ls())
gc()

# directories: define accordingly
fldr_rate_data <-...
fldr_formAform2_data <- ...
fldr_tex <- ...

#===================================================================================
# rate increase data: summary statistics
d_rate <- read.csv(paste0(fldr_rate_data, "cleaned_rate_increase_data.csv"), as.is = TRUE)

# Table A.7: summary stats of the rate increase data
ftn.temp <- function(sample){
  
  temp <- data.frame(c(nrow(distinct(sample, naic_code)),
                       nrow(distinct(sample, naic_code, policy_form)),
                       nrow(distinct(sample, state)),
                       mean(sample$approved),
                       mean(sample$requested_lb),
                       mean(sample$requested_ub),
                       mean(sample$approved_lb),
                       mean(sample$approved_ub),
                       nrow(distinct(sample, naic_code, policy_form, state, yy_requested))))
                        
  rownames(temp) <- c("\\# of insurers",
                      "\\# of policies",
                      "\\# of states",
                      "Share of requests that are approved",
                      "Mean requested lower bound (\\%)",
                      "Mean requested upper bound (\\%)",
                      "Mean approved lower bound (\\%)",
                      "Mean approved upper bound (\\%)",
                      "Number of requests (at policy-state-request year level)")
                      
  return(temp)
}

temp <- ftn.temp(d_rate)

mytable <- xtable(temp, 
                  label="table_rate_increase_data_summary", 
                  caption="Summary statistics of the rate increase data
                  \\newline
                  \\normalsize{\\emph{Notes}: 
                 Some insurers specify a range of the rate increase, e.g., 10-30\%. In this case, we refer to 10\% (30\%) as the lower (upper) bound of the rate request. We say a rate request was approved if the approved upper bound is strictly positive.}", 
                  align = "lr")

print(mytable,
      table.placement = "t",
      hline.after = c(0,  nrow(temp)-1, nrow(temp)),
      include.rownames = TRUE, include.colnames = FALSE, 
      file=paste0(fldr_tex, "summary_stat.tex"),
      sanitize.rownames.function = identity)

rm(temp, mytable)

#===================================================================================
# merge with NAIC data

# load merged FormA-Form2 data
d_naic <- read.csv(file.path(fldr_formAform2_data, "formA_form2_policy_year_level.csv"), as.is = TRUE)

rate_firms    <- sort(unique(d_rate$naic_code))
d_naic_sample <- d_naic[d_naic$naic_code %in% rate_firms, ]
d_naic_sample <- filter(d_naic_sample, year>=min(d_rate$yy_requested), year<=max(d_rate$yy_requested))
rm(rate_firms)

#======================================================================
# denominator is constructed from NAIC data
policy_age     <- sort(unique(d_naic_sample$yrs_since_firstissue))
state_reg_year <- distinct(d_rate, state, yy_regulation)

output_naic <- data.frame(policy_age, 
                          policy_state_combinations_before_reg = NA,
                          policy_state_combinations_after_reg = NA)

for (ii in 1:length(policy_age)) {
  
  dd <- filter(d_naic_sample, yrs_since_firstissue==policy_age[ii])
  
  temp <- dd %>%
    group_by(unique_first_year_issue) %>%
    summarise(plans=n())
  
  temp$states_where_policy_issued_before_reg <- -9999
  temp$states_where_policy_issued_after_reg  <- -9999
  
  for (jj in 1:nrow(temp)) {
    issue_year <- as.numeric(temp[jj, "unique_first_year_issue"])
    temp[jj, "states_where_policy_issued_before_reg"] = nrow(filter(state_reg_year, yy_regulation>issue_year))
    temp[jj, "states_where_policy_issued_after_reg"] = nrow(filter(state_reg_year, yy_regulation<=issue_year))
  }
  
  temp <- mutate(temp, p_s_before = plans*states_where_policy_issued_before_reg,
                 p_s_after=plans*states_where_policy_issued_after_reg)
  
  output_naic[ii, "policy_state_combinations_before_reg"] <- sum(temp$p_s_before)
  output_naic[ii, "policy_state_combinations_after_reg"]  <- sum(temp$p_s_after)
}

#======================================================================
# numerator is from the rate increase data

temp1 <- filter(d_rate, sold_after_regulation==0) %>%
  group_by(policy_age_at_request) %>%
  summarise(policy_state_requests_before_reg=n())

temp2 <- filter(d_rate, sold_after_regulation==1) %>%
  group_by(policy_age_at_request) %>%
  summarise(policy_state_requests_after_reg=n())

output_rate_request <- full_join(temp1, temp2, by="policy_age_at_request")
rm(temp1, temp2)

temp1 <- filter(d_rate, sold_after_regulation==0, approved==1) %>%
  group_by(policy_age_at_request) %>%
  summarise(policy_state_approvals_before_reg=n())

temp2 <- filter(d_rate, sold_after_regulation==1, approved==1) %>%
  group_by(policy_age_at_request) %>%
  summarise(policy_state_approvals_after_reg=n())

output_rate_approval <- full_join(temp1, temp2, by="policy_age_at_request")
rm(temp1, temp2)

#======================================================================
# merge the summary data

names(output_naic)[which(names(output_naic)=="policy_age")]<- "policy_age_at_request"

output <- full_join(output_rate_request, output_naic, by="policy_age_at_request")
output <- full_join(output_rate_approval, output, by="policy_age_at_request")

output <- mutate(output, 
                 sh_request_before    = policy_state_requests_before_reg/policy_state_combinations_before_reg,
                 sh_request_after     = policy_state_requests_after_reg/policy_state_combinations_after_reg,
                 sh_approval_before   = policy_state_approvals_before_reg/policy_state_combinations_before_reg,
                 sh_approval_after    = policy_state_approvals_after_reg/policy_state_combinations_after_reg,
                 approval_rate_before = policy_state_approvals_before_reg/policy_state_requests_before_reg,
                 approval_rate_after  = policy_state_approvals_after_reg/policy_state_requests_after_reg)

rm(output_rate_request, output_rate_approval, output_naic)

#======================================================================
# change the data format for easier ggplot applications
output_before <- select(output, 
                        policy_age_at_request,
                        policy_state_approvals_before_reg,
                        policy_state_requests_before_reg,
                        policy_state_combinations_before_reg,
                        sh_request_before,
                        sh_approval_before,
                        approval_rate_before)
output_before$soldafterregulation <- 0

output_after <- select(output, 
                       policy_age_at_request,
                       policy_state_approvals_after_reg,
                       policy_state_requests_after_reg,
                       policy_state_combinations_after_reg,
                       sh_request_after,
                       sh_approval_after,
                       approval_rate_after)
output_after$soldafterregulation <- 1

temp_names <-  c("policy_age_at_request", 
                 "policy_state_approvals", 
                 "policy_state_requests", 
                 "policy_state_all", 
                 "sh_request",
                 "sh_approval", 
                 "approval_rate", 
                 "soldafterregulation")
names(output_before) <- temp_names
names(output_after)  <- temp_names

output <- bind_rows(output_before, output_after)

output$soldafterregulation <- as.factor(output$soldafterregulation)
rm(output_before, output_after, temp_names)

#======================================================================
# Figure 4

f_width_n  = 1.5
f_height_n = 6

# Figure 4: extensive margin
tikz(file=paste0(fldr_tex, "share_approvals_by_age_n.tex"), f_width_n, f_height_n)
ggplot(data=filter(output, policy_state_requests>=30, policy_age_at_request<=14, policy_age_at_request>=6), 
       aes(x=policy_age_at_request, 
           y=sh_approval, 
           group=soldafterregulation, 
           color=soldafterregulation)) + 
  geom_point(size=3) + geom_line(size=2) +
  scale_colour_manual(name="Sold after reg.", values = c(before_color, after_color)) +
  theme(legend.position = "bottom", text = element_text(size=text_size)) +
  ylab("Share of plans with rate increases")  +
  xlab("Plan age at request") +
  scale_x_continuous(breaks=seq(min(output$policy_age_at_request), max(output$policy_age_at_request),2)) 
dev.off()

# Figure 4: intensive margin
temp <- d_rate %>%
  group_by(policy_age_at_request, sold_after_regulation) %>%
  summarise(obs = n(),
            m_approved_avg = mean(approved_avg),
            m_sh_increase_approved = mean(sh_increase_approved))

temp <- as.data.frame(temp)
temp$sold_after_regulation <- as.factor(temp$sold_after_regulation)
names(temp) <- gsub("_", "", names(temp))

tikz(file=paste0(fldr_tex, "percent_approved_increase_by_age_n.tex"), f_width_n, f_height_n)
ggplot(data=filter(temp, obs>=30, policyageatrequest<=14, policyageatrequest>=6), 
       aes(x=policyageatrequest, 
           y=mshincreaseapproved, 
           group=soldafterregulation, 
           color=soldafterregulation)) + 
  geom_point(size=3) + geom_line(size=2) +
  scale_colour_manual(name="Sold after reg.", values = c(before_color, after_color)) +
  theme(legend.position = "bottom", text = element_text(size=text_size)) +
  ylab("Ratio of approved to requested increase (mean)")  +
  xlab("Plan age at request") +
  scale_x_continuous(breaks=seq(min(temp$policyageatrequest), max(temp$policyageatrequest),2)) 
dev.off()










